# -*- coding: utf-8 -*-
"""
This code generates the figures used in the analysis of the nucleosome model (see Section SI.7).
The model was first introduced in:
Dodd IB, Micheelsen MA, Sneppen K, Thon G. Theoretical analysis of epigenetic cell 
memory by nucleosome modification. Cell. 2007 May 18;129(4):813-22. 
doi: 10.1016/j.cell.2007.02.053. PMID: 17512413.

Created on Mon Apr 22 10:28:59 2024
@author: Andrei Sontag
"""

import numpy as np
import pandas as pd

from scipy.optimize import curve_fit
        
# Straight-line in euclidian coordinates
def func_str(x,a):
    return 1-a-x
        
# hyperbola in euclidian coordinates
def func_curv(x,a):
    return (1-x)/(1+a*x)
        
# 1-x-y - a*xy = b (mix between line and curved)
def func_curvb(x,a,b):
    return (1-x-b)/(1+a*x)
        
# straight-line in projected coordinates
def funline(z,a):
    size = len(z)
    return size*[a]
        
# hyperbola in projected coordinates
def funcurv(z,a):
    return 1-(-2+np.sqrt(4+4*a+(a*z)**2))/a
        
# 1-x-y - a*xy = b in projected coordinates
def funcurvb(z,a,b):
    return 1-(-2+np.sqrt(4+4*a*(1-b)+(a*z)**2))/a

N = 60
neighs = np.linspace(0,N,N+1)
pvals = np.linspace(0,1,101)
stab = np.zeros((61,101))
curv = np.zeros((61,101))

for m in range(0,61):
    for h in range(0,101):
        file_name = "nuc_%d_%d.txt" % (m,h)
        file_z = "z_%d_%d.txt" % (m,h)
        print(m,h)
        
        data = np.loadtxt(r'.\nuc\\'+file_name,delimiter=',')
        z = np.loadtxt(r'.\nuc\\'+file_z,delimiter=',')
        
        z = z+z[::-1]
        
        maxid = (np.argmax(z)-N)/N
        stab[m][h] = maxid
            
        ratios = np.arange(-1,1+1/N,1/N)

        mean_Abr = np.zeros_like(ratios)
        count_r = np.ones_like(ratios)
        mean_abs = np.zeros(2*N+1)
        count_mabs = np.zeros(2*N+1)
        
        for i in range(0,61):
            for j in range(0,61):
                rat = (i-j)/N
                idx = np.argmin(abs(ratios-rat))
                
                mean_Abr[idx] += data[i][j]*(N-i-j)/N
                count_r[idx] += data[i][j]
            
        #%%
        ### DATA ANALYSIS        
        # Symmetrise data
        mean_Abr2 = (mean_Abr+mean_Abr[::-1])
        # Find average number of abstentions per bin
        mean_Abr = mean_Abr/count_r
        mean_Abr2 = mean_Abr2/(count_r+count_r[::-1])
        
        
        # Get rid of points with <2 data points
        mAbr = mean_Abr2[count_r+count_r[::-1]>1]

        rats = ratios[count_r+count_r[::-1]>1]
        
        xdata = rats[1:-1] # alignment values
        ydata = mAbr[1:-1]
        
        curved_fitb = curve_fit(funcurvb, xdata, ydata, full_output=True)
        
        curv[m][h] = curved_fitb[0][0]


cols = ['{0:0.2f}'.format(val) for val in pvals]
idxs = ['%d' % (val) for val in neighs]
stabs = pd.DataFrame(-stab,columns = cols, index = idxs)
curvs = pd.DataFrame(curv,columns = cols, index = idxs)

stabs.to_csv('stability', sep=',', encoding='utf-8', index=True, header=True)
curvs.to_csv('curvature', sep=',', encoding='utf-8', index=True, header=True)